package com.itcast.hadoop.llyy.topkurl;/**
 * Created by Administrator on 2019/4/19 0019.
 */

import com.itcast.hadoop.flowsum.FlowBean;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @author ydf
 * @com kt
 * @create 2019-04-19 下午 3:25
 * 1368607180548   1368607186666   10.81.97.92     10.81.97.92     52355                                   GET     173.194.72.139  80      谷歌    工具软件        常用工具        http://www.google-analytics.com/__utm.gif?utmwv=4.9.1mi&utmn=1142909350&utmt=event&utme=5(%E7%94%A8%E6%88%B7%E4%BF%A1%E6%81%AF*%E7%B3%BB%E7%BB%9F*6.1.3)(0)&utmcs=UTF-8&utmsr=768x1024&utmsc=24-bit&utmul=zh-hans-cn&utmac=UA-37902965-1&utmcc=__utma%3D1.1974127687.1339237011.1368535061.1368543014.238%3B&utmht=1368543014217&utmqt=64239008   www.google-analytics.com        google分析      科技    软件    谷歌    3       0       4542    0       200     GoogleAnalytics/1.5.1 (iPad; U; CPU iPhone OS 6.1.3 like Mac OS X; zh-hans-cn)  2   0
 **/
public class TopkUrlMappper extends Mapper<LongWritable, Text, Text, FlowBean> {

    private FlowBean flowBean = new FlowBean();
    private Text k = new Text();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] fields = StringUtils.split(line, "\t");
        try {
            if (fields.length > 32 && StringUtils.isNotEmpty(fields[26]) && fields[26].startsWith("http")) {
                String url = fields[26];
                long up_flow = Long.parseLong(fields[30]);
                long d_flow = Long.parseLong(fields[31]);
                k.set(url);
                flowBean.set("", up_flow, d_flow);

                context.write(k, flowBean);
            }

        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}
